# Replication code for Taylor C. Boas and Amy Erica Smith, “Looks Like Me, Thinks Like Me: Descriptive Representation and Opinion Congruence in Brazil.” Latin American Research Review 54, 2 (2019).

# Analysis conducted in R 3.6.0 on MacOS 10.13.6

# NOTE: This file replicates Appendix Tables 3 and 10 as well as a result conveyed textually in the Appendix. We recommend running R replication files in the following order; please see readme.txt for details.
# 	1_merge_lapop.R
# 	2_merge_latinobarometro.R
# 	3_recode_reshape.R
# 	4_difference_in_distributions.R
# 	5_regressions.R
# 	6_civil_society_meeting.R
# 	7_mass_descriptives.R
# 	8_elite_descriptives.R
# 	9_mean_differences.R
#	10_difference_in_distributions_ks.R
#	11_elite_sample_simulation.R
#	12_converts_vs_lifelong.R

# Set working directory as appropriate
# setwd('~/Dropbox/brazil_leg_surveys/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.

library(Hmisc)
library(readstata13)
library(foreign)

rm(list=ls(all=T))

# Load Democratic Accountability and Linkages Project file. Please visit https://sites.duke.edu/democracylinkage/ to download and put the file in the working directory. We have confirmed that the replication code works with the version of this file downloaded in early June 2019. If the filename that you download differs from that below, the data file may have changed, and some revision of the code may be necessary to replicate the published results. 

dalp<-read.dta('partylevel_20130907.dta')
dalp<-dalp[dalp$country=='Brazil',]

# Load candidate data files from Brazil's Tribunal Superior Eleitoral. These were read from CSV into R but not further processed.

load('cand.data.98.RData')
load('cand.data.02.RData')
load('cand.data.06.RData')
load('cand.data.10.RData')

# Load data on black and evangelical legislators. Data on black legislators are from Race, Politics, and Education in Brazil: Affirmative Action in Higher Education, Edited by Ollie A. Johnson III and Rosana Heringer (Palgrave Macmillan, 2015), Appendices 1.1 and 1.2. See main text for evangelical data sources.

black.leg.10<-read.csv('bancada_negra_2010_johnson.csv')
black.leg.06<-read.csv('bancada_negra_2006_johnson.csv')
evang.leg<-read.dta13('bancadas.evangelicas_pluscedi.dta')

load('elite.data.RData')

cands98<-cands98[cands98$DESCRICAO_CARGO %in% c('DEPUTADO FEDERAL','SENADOR') & cands98$DESC_SIT_TOT_TURNO %in% c('ELEITO','MÉDIA'),]
cands02<-cands02[cands02$DESCRICAO_CARGO %in% c('DEPUTADO FEDERAL','SENADOR') & cands02$DESC_SIT_TOT_TURNO %in% c('ELEITO','ELEITO POR MÉDIA'),]
cands06<-cands06[cands06$DESCRICAO_CARGO %in% c('DEPUTADO FEDERAL','SENADOR') & cands06$DESC_SIT_TOT_TURNO %in% c('ELEITO','MÉDIA'),]
cands10<-cands10[cands10$DESCRICAO_CARGO %in% c('DEPUTADO FEDERAL','SENADOR') & cands10$DESC_SIT_TOT_TURNO %in% c('ELEITO','MÉDIA'),]

leg52<-rbind(cands98[cands98$DESCRICAO_CARGO=='SENADOR',],cands02)
leg53<-rbind(cands02[cands02$DESCRICAO_CARGO=='SENADOR',],cands06)
leg54<-rbind(cands06[cands06$DESCRICAO_CARGO=='SENADOR',],cands10)
leg52$birthyear<-as.numeric(substr(leg52$DATA_NASCIMENTO,5,8))
leg53$birthyear<-NA
leg53$birthyear[nchar(leg53$DATA_NASCIMENTO)==8]<-as.numeric(substr(leg53$DATA_NASCIMENTO[nchar(leg53$DATA_NASCIMENTO)==8],5,8))
leg53$birthyear[nchar(leg53$DATA_NASCIMENTO)==10]<-as.numeric(substr(leg53$DATA_NASCIMENTO[nchar(leg53$DATA_NASCIMENTO)==10],7,10))
leg54$birthyear<-NA
leg54$birthyear[nchar(leg54$DATA_NASCIMENTO)==10]<-as.numeric(substr(leg54$DATA_NASCIMENTO[nchar(leg54$DATA_NASCIMENTO)==10],7,10))
leg54$birthyear[nchar(leg54$DATA_NASCIMENTO)==9]<-as.numeric(paste(19,substr(leg54$DATA_NASCIMENTO[nchar(leg54$DATA_NASCIMENTO)==9],8,9),sep=''))
leg52$age<-2005 - leg52$birthyear
leg53$age<-2009 - leg53$birthyear
leg54$age<-2013 - leg54$birthyear
leg52$leg<-52
leg53$leg<-53
leg54$leg<-54
leg.data<-rbind(leg52,leg53,leg54)

# ===========================================================
# Appendix Table 3: BLS Surveys vs. Corresponding Legislature
# ===========================================================

descr.table<-matrix(c(
	tapply(elite.data$elite_year,elite.data$elite_year,length),rep(594,3),
	tapply(elite.data$elite_evang,elite.data$elite_year,mean,na.rm=T), table(evang.leg$year)/594,
	tapply(elite.data$elite_female,elite.data$elite_year,mean,na.rm=T), tapply(leg.data$DESCRICAO_SEXO=='FEMININO',leg.data$leg,mean,na.rm=T),
	NA,tapply(elite.data$elite_black[elite.data$elite_year > 2005],elite.data$elite_year[elite.data$elite_year > 2005],mean,na.rm=T),NA,nrow(black.leg.06)/594, nrow(black.leg.10)/594,
	tapply(elite.data$elite_loweduc,elite.data$elite_year,mean,na.rm=T), tapply(!leg.data$DESCRICAO_GRAU_INSTRUCAO %in% c('SUPERIOR COMPLETO','SUPERIOR INCOMPLETO'),leg.data$leg,mean,na.rm=T),
	tapply(elite.data$elite_age,elite.data$elite_year,mean,na.rm=T), tapply(leg.data$age,leg.data$leg,mean,na.rm=T),
	tapply(elite.data$elite_uf %in% c('PR','RS','SC'),elite.data$elite_year,mean), tapply(leg.data$SIGLA_UF %in% c('PR','RS','SC'),leg.data$leg,mean),
	tapply(elite.data$elite_uf %in% c('SP','RJ','ES','MG'),elite.data$elite_year,mean), tapply(leg.data$SIGLA_UF %in% c('SP','RJ','ES','MG'),leg.data$leg,mean),
	tapply(elite.data$elite_uf %in% c('MT','MS','GO','DF'),elite.data$elite_year,mean), tapply(leg.data$SIGLA_UF %in% c('MT','MS','GO','DF'),leg.data$leg,mean),
	tapply(elite.data$elite_uf %in% c('AM','RR','AP','PA','TO','RO','AC'),elite.data$elite_year,mean), tapply(leg.data$SIGLA_UF %in% c('AM','RR','AP','PA','TO','RO','AC'),leg.data$leg,mean),
	tapply(elite.data$elite_uf %in% c('MA','PI','CE','RN','PE','PB','SE','AL','BA'),elite.data$elite_year,mean), tapply(leg.data$SIGLA_UF %in% c('MA','PI','CE','RN','PE','PB','SE','AL','BA'),leg.data$leg,mean),
	tapply(elite.data$elite_partythen=='PT',elite.data$elite_year,mean,na.rm=T), tapply(leg.data$SIGLA_PARTIDO=='PT',leg.data$leg,mean,na.rm=T),
	tapply(elite.data$elite_partythen=='PMDB',elite.data$elite_year,mean,na.rm=T), tapply(leg.data$SIGLA_PARTIDO=='PMDB',leg.data$leg,mean,na.rm=T),
	tapply(elite.data$elite_partythen=='PSDB',elite.data$elite_year,mean,na.rm=T), tapply(leg.data$SIGLA_PARTIDO=='PSDB',leg.data$leg,mean,na.rm=T),
	tapply(elite.data$elite_partythen=='DEM'| elite.data$elite_partythen=='PFL',elite.data$elite_year,mean,na.rm=T), tapply(leg.data$SIGLA_PARTIDO=='DEM'| leg.data$SIGLA_PARTIDO=='PFL',leg.data$leg,mean,na.rm=T)),ncol=6,byrow=T)		
	
descr.table<-round(descr.table,2)
descr.table<-descr.table[,c(1,4,2,5,3,6)]
descr.table<-apply(descr.table,2,as.character)
rownames(descr.table)<-c('$N$','Evangelical','Female','Afro-Brazilian','No College','Average Age','South','Southeast','Northeast','North','Center-West','PT','PMDB','PSDB','DEM')
colnames(descr.table)<-c('BLS 2005','52nd Leg.','BLS 2009','53rd Leg.','BLS 2013','54th Leg.')
descr.table['Average Age',]<-as.character(round(as.numeric(descr.table['Average Age',]),0))
descr.table[is.na(descr.table)]<-''

bottom.note<-'NOTE: Entries are proportions, except for average age and $N$. Age is measured as of the year of each BLS survey. Party is measured at the time of election. Data on legislatures are from the Superior Electoral Tribunal (TSE) and correspond to election winners; they do not account for leaves of absence or replacements (\\textit{suplentes}).'

descr.table.latex<-latex(descr.table,file='descr_table.tex',collabel.just=rep('c',6),col.just=rep('c',6),rowlabel = '', rgroup = c('','Demographics','Region','Party'), n.rgroup = c(1,5,5,4), caption = 'BLS Surveys vs. Corresponding Legislature', booktabs = F, ctable = T, where = "htp",insert.bottom=bottom.note)

# =================================================================
# Appendix Table 10: Measures of Clientelism: Brazilian Legislative
# Surveys vs. Democratic Accountability and Linkages Project
# =================================================================

# Change some party acronyms to account for name changes, mergers, differences across datasets
elite.data$elite_partythen[elite.data$elite_partythen=='PFL']<-'DEM/PFL'
elite.data$elite_partythen[elite.data$elite_partythen=='DEM']<-'DEM/PFL'
elite.data$elite_partythen[elite.data$elite_partythen=='PL']<-'PR/PL'
elite.data$elite_partythen[elite.data$elite_partythen=='PR']<-'PR/PL'
dalp$pnatacro[dalp$pnatacro=='PC do BR']<-'PC do B'
dalp$pnatacro[dalp$pnatacro=='PFL']<-'DEM/PFL'
dalp$pnatacro[dalp$pnatacro=='PL']<-'PR/PL'

bls_est<-round(with(elite.data[!elite.data$elite_partythen %in% c('PMN','PSOL','PRB','PV','PSC','PSD','OUTRO'),],tapply(elite_clientelism,elite_partythen,mean,na.rm=T)),3)
dalp_est<-round(tapply(dalp$b15, dalp$pnatacro, function(x) (x-5)/15),3)
bls_rank<-rank(-1*bls_est)
dalp_rank<-rank(-1*dalp_est)

client_table<-data.frame(bls_est,bls_rank,dalp_est,dalp_rank)
client_table<-client_table[order(client_table$bls_rank),]

bottom.note<-'NOTE: Entries are party-level averages. Measures of clientelism are the variable CLIENTS from the Brazilian Legislative Surveys (BLS) and the variable b15 from the Democratic Accountability and Linkages Project (DALP). Both are rescaled from 0 (theoretical minimum) to 1 (theoretical maximum).'

client.table.latex<-latex(client_table,file='client_table.tex',colheads = rep(c('Score','Rank'),2),collabel.just=rep('c',4),col.just=rep('c',4),cgroup = c('BLS','DALP'), n.cgroup=c(2,2), rowlabel = 'Party', caption = 'Measures of Clientelism: Brazilian Legislative Surveys vs. Democratic Accountability and Linkages Project', booktabs = F, ctable = T, where = "htp",insert.bottom=bottom.note)

# ==========================
# Results conveyed textually
# ==========================

# Appendix: "There is somewhat greater divergence in rank ordering among the most clientelistic parties, but overall, the measures are highly correlated (r = 0.86, p < 0.001)."

cor.test(bls_est, dalp_est, method='pearson')
